home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Shareware Grab Bag
/
Shareware Grab Bag.iso
/
050
/
bix02.arc
/
INVERT.PAS
< prev
next >
Wrap
Pascal/Delphi Source File
|
1986-08-04
|
16KB
|
381 lines
(* INVERT.PAS version 0.10 3/23/86 *)
(* Adds support for file size up to 4 megabytes *)
(*******************************************************************)
(* *)
(* INVERT *)
(* *)
(* Copyright 1986 by Mark J. Welch *)
(* P.O. Box 2409, SF, CA 94126 *)
(* *)
(* Portions copyright 1986 by JimKeo *)
(* *)
(* Last revised 3/22/86 by MJW *)
(* *)
(* 1) Read text files and "invert" them into a sorted list of *)
(* words and their locations within the file. *)
(* 2) Store inversion to disk file *)
(* 3) Later, modify program to allow unlimited expandability *)
(* of inversion file and easy search/access to keywords *)
(* *)
(* Words are stored in a binary tree, and location pointers *)
(* are stored in a linked list rooted at each tree entry. *)
(* *)
(*******************************************************************)
(**********************************************************************)
(* *)
(* WARNING *)
(* *)
(* WARNING: To save on execution time, the program does not now check *)
(* available heap space before calling New(). As a result, it will *)
(* crash if it runs out of memory, usually after about an hour of *)
(* execution; if it crashes, all work is lost. It also crashes if too *)
(* many files are inverted at one time. This error-handling should be *)
(* improved in any "release" version. -MJW *)
(* *)
(**********************************************************************)
Program Invert;
(* Invert a text file for quick searching, etc. *)
CONST
FileNameLength = 20; (* 14 is probably sufficient, but... *)
WordLength = 14; (* truncate words at 14 characters *)
MaxFiles = 256; (* maximum number of files to invert *)
(* -- needed for FileList array, but *)
(* could be replaced with a linked *)
(* list later. *)
TYPE
S = String[WordLength];
FileName = String[FileNameLength];
FileNum = Integer;
FileLists = Array[1..MaxFiles] of FileName;
LocationPtr = ^LocationType;
LocationType = RECORD
F: FileNum; (* index into FileList *)
FilBlockPtr: Integer;(* 128-byte block offset *)
FilPosPtr: Byte; (* byte offset in file *)
Next: LocationPtr; (* next location in linked list *)
END;
WordPtr = ^WordType;
WordType = RECORD
Location: LocationPtr; (* pointer into file *)
Parent: WordPtr; (* in b-tree *)
SmallerChild: WordPtr; (* less-than *)
GreaterChild: WordPtr; (* greater-than *)
Text: S; (* the word *)
END;
VAR
FileList: FileLists; (* list of files that are being inverted *)
NumFiles: Integer; (* number of files inverted so far *)
WordRoot: WordPtr; (* root of the b-tree *)
i: integer; (* index for FOR loop *)
(****************************************************************)
(* *)
(* PrintResults *)
(* *)
(* Print the entire tree in alphabetical order to a file *)
(* *)
(****************************************************************)
Procedure PrintResults;
var outfil: text;
i: integer;
(***********************************************)
(* *)
(* PrintTree *)
(* *)
(* RECURSIVE: print entire branch of tree by *)
(* first printing entire Smaller branch, then *)
(* current node text, and then entire Larger *)
(* *)
(***********************************************)
Procedure PrintTree(X: WordPtr);
Var L: LocationPtr;
i: integer;
r: real;
Begin
If X^.SmallerChild <> Nil (* leftmost children first *)
then PrintTree(X^.SmallerChild);
L := X^.Location;
If L <> Nil (* Then current node *)
then
begin
for i := 2 to ord(x^.text[0]) do
if ((x^.text[i] >= 'A') and (x^.text[i] <= 'Z'))
then x^.text[i] := chr(ord(x^.text[i])+32);
Write(outfil,X^.Text,': ');
Repeat
r := (L^.FilBlockPtr * 128.0) +L^.FilPosPtr;
Write(outfil,L^.F:1,'-',r:1:0,' ');
L := L^.Next;
Until L = Nil;
Writeln(outfil);
end;
If X^.GreaterChild <> Nil (* and then right children *)
then PrintTree(X^.GreaterChild);
End; (*PrintTree *)
begin (* PrintResults *)
Assign(outfil,'INVERT.INX');
Rewrite(outfil);
for i := 1 to NumFiles do
Writeln(outfil, i:1,' ',FileList[i]);
writeln(outfil);
PrintTree(WordRoot);
writeln(outfil);
close(outfil);
end; (* printResults *)
(*******************************************************************)
(* *)
(* Title *)
(* *)
(*******************************************************************)
Procedure Title;
Begin
Writeln;
Writeln(' Invert -- Version 0.10 -- 3/23/86');
Writeln;
Writeln('Copyright 1986 by Mark J. Welch, Box 2409, SF CA 94126');
Writeln(' (Portions Copyright 1986 by Jim Keohane)');
Writeln;
Writeln;
End;
(*******************************************************************)
(* *)
(* MemoryAvail *)
(* *)
(*******************************************************************)
Function MemoryAvail: Real;
Var M: Real;
begin
M := Memavail;
If M < 0 then M := 65536.0 + M;
MemoryAvail := M * 16;
end;
(*******************************************************************)
(* *)
(* CreateRoot *)
(* *)
(* The binary tree has to start somewhere: start it here. *)
(* *)
(*******************************************************************)
Procedure CreateRoot;
Begin
New(WordRoot);
WordRoot^.GreaterChild := Nil;
WordRoot^.SmallerChild := Nil;
WordRoot^.Text := 'Mzzzzzzzzz'; (* let's split the alphabet here *)
(* to improve initial tree balance *)
WordRoot^.Location := Nil;
WordRoot^.Parent := Nil; (* no parent for root *)
End; (* CreateRoot *)
(****************************************************************)
(* *)
(* InvertFil *)
(* *)
(* Given a file, add all its words and their locations *)
(* to the inversion tree. *)
(* *)
(****************************************************************)
Procedure InvertFile(FN: FileName);
var block:array[0..127] of char;
j,k:integer;
Fil: file;
St: S;
StPtr,CurrentLoc: LocationPtr;
c: char;
i: integer;
matchkey: boolean;
(****************************************************************)
(* *)
(* AddWord *)
(* *)
(* Given a word and a prepared location link/pointer, *)
(* add it into the inversion structure, either as a new *)
(* word or onto an existing linked-list. *)
(* *)
(****************************************************************)
Procedure AddWord(St: S; StLoc: LocationPtr);
Var CurrentWord: WordPtr;
Match: Boolean;
(**********************************************)
(* *)
(* NewChild *)
(* *)
(* Add St as a new word in tree *)
(* *)
(**********************************************)
Procedure NewChild(var X: WordPtr; var St: s);
(* match is imported as a "global" variable *)
begin
Match := true;
GetMem(X,sizeof(X^)-WordLength+Length(St));
X^.text := St;
X^.Parent := CurrentWord;
X^.SmallerChild := Nil;
X^.GreaterChild := Nil;
X^.Location := StLoc;
end; (* NewChild *)
Begin (* AddWord *)
(* First search if it exists *)
CurrentWord := WordRoot; (* start at root of tree *)
Match := False; (* haven't found the right place yet *)
Repeat
If (CurrentWord^.Text[1] = St[1])
then
if (CurrentWord^.Text = St)
then begin
match := true; (* global *)
CurrentLoc := CurrentWord^.Location;
While CurrentLoc^.Next <> Nil Do
CurrentLoc := CurrentLoc^.Next;
CurrentLoc^.Next := StLoc;
end
else if (CurrentWord^.Text > St)
then if CurrentWord^.SmallerChild <> Nil
then CurrentWord := CurrentWord^.SmallerChild
else NewChild(CurrentWord^.SmallerChild,st)
else if CurrentWord^.GreaterChild <> Nil
then CurrentWord := CurrentWord^.GreaterChild
else NewChild(CurrentWord^.GreaterChild,st)
else
if (CurrentWord^.Text[1] > St[1])
then if CurrentWord^.SmallerChild <> Nil
then CurrentWord := CurrentWord^.SmallerChild
else NewChild(CurrentWord^.SmallerChild,st)
else if CurrentWord^.GreaterChild <> Nil
then CurrentWord := CurrentWord^.GreaterChild
else NewChild(CurrentWord^.GreaterChild,st);
Until Match;
End; (* AddWord *)
Begin (* InvertFil *)
(* JimKeo-modified code included in this procedure *)
Writeln('Invert: ',FN); (* let user know what file we're fiddling with *)
NumFiles := NumFiles + 1;
If NumFiles > MaxFiles (* remove this when FileList is a linked list *)
then
begin (* crash impolitely, trashing all work done so far *)
writeln('Too many files inverted: maximum is ',MaxFiles);
Halt;
end
else FileList[NumFiles] := fn;
Assign(fil,fn);
Reset(fil);
St := '';
k := 0; {block no}
blockread(fil,block,1,j); {read first block}
while (j=1) do
begin
for i := 0 to 127 do {128 bytes per block}
begin
c:=block[i];
If ((c >= 'A') and (c < 'z'))
and ((c >= 'a') or (c <= 'Z'))
then st := st + UpCase(c)
else
if (Ord(St[0]) > 0)
then
begin
matchKey := false;
case st[1] of
'A': matchKey := (st = 'A') or (st = 'AND') or (st = 'AN');
'I': matchKey := (st = 'IN') or (st = 'IS') or
(st = 'IT') or (st = 'ITS');
'N': matchKey := (st = 'NOT');
'O': matchKey := (st = 'OR') or (st = 'ON') or (st = 'OF');
'T': matchKey := (st = 'THE') or (st = 'TO');
'Y': matchKey := (st = 'YOU');
end; (* case *)
if matchKey
then St := ''
else
begin
New(StPtr);
StPtr^.F := NumFiles;
StPtr^.FilBlockPtr := k;
StPtr^.FilPosPtr :=i-ord(st[0]); {FilePos out}
StPtr^.Next := Nil;
AddWord(St,StPtr);
St := '';
end;
end;
end;
k:=k+1; {add in 128 bytes per block}
blockread(fil,block,1,j) {read next and j=1 if more}
end;
Writeln(' Done Inverting ',FN);
End; (* InvertFile *)
(************************************************************************)
(* *)
(* Main Program Body *)
(* *)
(************************************************************************)
Begin (* Main Program *)
Title;
NumFiles := 0;
CreateRoot;
Writeln('Bytes of Available Memory at start: ',MemoryAvail:6:0);
For i := 1 to ParamCount do
begin
InvertFile(ParamStr(i));
writeln(' Bytes of Available Memory: ',MemoryAvail:6:0);
end;
Writeln('Done inverting, now storing results....');
PrintResults;
End.
(**********************************************************************)
(* p.p.s. To reduce mem reqs, I would move "Text:S" to end of record *)
(* description. Then, instead of New(recptr) I would use *)
(* GetMem(recptr,sizeof(recptr^)-WordLength+Length(St)); That way, if *)
(* St='HEAD', you save 10 bytes of memory! -JimKeo[hane] *)
(**********************************************************************)